"""
spider_man.py

Copyright 2006 Andres Riancho

This file is part of w3af, http://w3af.org/ .

w3af is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation version 2 of the License.

w3af is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with w3af; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA

"""
import os
import time
import traceback

from multiprocessing.dummy import Process

import w3af.core.controllers.output_manager as om
import w3af.core.data.constants.ports as ports

from w3af import ROOT_PATH
from w3af.core.data.url.HTTPResponse import HTTPResponse
from w3af.core.data.request.fuzzable_request import FuzzableRequest
from w3af.core.controllers.daemons.proxy.templates.utils import render
from w3af.core.controllers.plugins.crawl_plugin import CrawlPlugin
from w3af.core.controllers.daemons.proxy import Proxy, ProxyHandler
from w3af.core.controllers.exceptions import RunOnce, ProxyException
from w3af.core.controllers.misc.decorators import runonce

from w3af.core.data.options.opt_factory import opt_factory
from w3af.core.data.options.option_list import OptionList
from w3af.core.data.parsers.doc.url import URL
from w3af.core.data.dc.headers import Headers

# Cohny changed the original http://w3af/spider_man?terminate
# to http://127.7.7.7/spider_man?terminate because in Opera we got
# an error if we used the original one! Thanks Cohny!
TERMINATE_URL = URL('http://127.7.7.7/spider_man?terminate')
TERMINATE_FAVICON_URL = URL('http://127.7.7.7/favicon.ico')


class spider_man(CrawlPlugin):
    """
    SpiderMan is a local proxy that will collect new URLs.

    :author: Andres Riancho (andres.riancho@gmail.com)
    :author: Alexander Berezhnoy < alexander.berezhnoy |at| gmail.com >
    """
    def __init__(self):
        CrawlPlugin.__init__(self)
        self._first_captured_request = True
        self._proxy = None

        # User configured parameters
        self._listen_address = '127.0.0.1'
        self._listen_port = ports.SPIDERMAN

    @runonce(exc_class=RunOnce)
    def crawl(self, fuzzable_request, debugging_id):
        """
        :param debugging_id: A unique identifier for this call to discover()
        :param fuzzable_request: A fuzzable_request instance that contains
                                   (among other things) the URL to test.
        """
        # Create the proxy server
        try:
            self._proxy = LoggingProxy(self._listen_address,
                                       self._listen_port,
                                       self._uri_opener,
                                       handler_klass=LoggingHandler,
                                       plugin=self,
                                       target_domain=fuzzable_request.get_url().get_domain(),
                                       name='SpiderManProxyThread')
        except ProxyException, proxy_exc:
            om.out.error('%s' % proxy_exc)
        
        else:
            msg = ('spider_man proxy is running on %s:%s.\nPlease configure '
                   'your browser to use these proxy settings and navigate the '
                   'target site.\nTo exit spider_man plugin please navigate'
                   ' to %s .' % (self._listen_address,
                                 self._listen_port,
                                 TERMINATE_URL))
            om.out.information(msg)
            
            self._proxy.run()

    def send_fuzzable_request_to_core(self, freq):
        self.output_queue.put(freq)

        if self._first_captured_request:
            self._first_captured_request = False
            om.out.information('The spider_man plugin processed the first HTTP'
                               ' request.')

    def get_options(self):
        """
        :return: A list of option objects for this plugin.
        """
        ol = OptionList()

        d = 'IP address that the spider_man proxy will use to receive requests'
        o = opt_factory('listen_address', self._listen_address, d, 'string')
        ol.add(o)

        d = ('Port that the spider_man HTTP proxy server will use to receive'
             ' HTTP requests')
        o = opt_factory('listen_port', self._listen_port, d, 'integer')
        ol.add(o)

        return ol

    def set_options(self, options_list):
        """
        This method sets all the options that are configured using the user
        interface generated by the framework using the result of get_options().

        :param options_list: A dictionary with the options for the plugin.
        :return: No value is returned.
        """
        self._listen_address = options_list['listen_address'].get_value()
        self._listen_port = options_list['listen_port'].get_value()

    def get_long_desc(self):
        """
        :return: A DETAILED description of the plugin functions and features.
        """
        return """
        This plugin is a local proxy that can be used to give the framework
        knowledge about the web application when it has a lot of client side
        code like Flash or Java applets. Whenever a w3af needs to test an
        application with flash or javascript, the user should enable this plugin
        and use a web browser to navigate the site using spider_man proxy.

        The proxy will extract information from the user navigation and generate
        the necessary injection points for the audit plugins.

        Another feature of this plugin is to save the cookies that are sent by
        the web application, in order to be able to use them in other plugins.
        So if you have a web application that has a login with cookie session
        management you should enable this plugin, do the login through the
        browser and then let the other plugins spider the rest of the
        application for you. Important note: If you enable web_spider, you
        should ignore the "logout" link.

        Two configurable parameters exist:
            - listen_address
            - listen_port
        """


class LoggingHandler(ProxyHandler):

    def handle_request_in_thread(self, flow):
        """
        This method handles EVERY request that was send by the browser, we
        receive the request and:

            * Check if it's a request to indicate we should finish, if not
            * Parse it and send to the core

        :param flow: A libmproxy flow containing the request
        """
        http_request = self._to_w3af_request(flow.request)

        uri = http_request.get_uri()
        msg = '[spider_man] Handling request: %s %s'
        om.out.debug(msg % (http_request.get_method(), uri))

        if uri.get_domain() == self.parent_process.target_domain:
            grep = True
        else:
            grep = False

        try:
            if self._is_terminate_favicon(http_request):
                http_response = self._create_favicon_response(http_request)
            elif self._is_terminate_request(http_request):
                self._terminate()
                http_response = self._create_terminate_response(http_request)
            else:
                # Send the request to the core
                freq = FuzzableRequest.from_http_request(http_request)
                self.parent_process.plugin.send_fuzzable_request_to_core(freq)

                # Send the request to the remote webserver
                http_response = self._send_http_request(http_request, grep=grep)
        except Exception, e:
            trace = str(traceback.format_exc())
            http_response = self._create_error_response(http_request, None, e,
                                                        trace=trace)

        # Useful logging
        headers = http_response.get_headers()
        cookie_value, cookie_header = headers.iget('cookie', None)
        if cookie_value is not None:
            msg = ('The remote web application sent the following'
                   ' cookie: "%s" through the spider-man proxy.\nw3af will use'
                   ' it during the rest of the scan process in order to'
                   ' maintain the session.')
            om.out.information(msg % cookie_value)

        # Send the response (success|error) to the browser
        http_response = self._to_libmproxy_response(flow.request, http_response)
        flow.reply(http_response)

    def _is_terminate_favicon(self, http_request):
        """
        :see: https://github.com/andresriancho/w3af/issues/9135
        """
        if http_request.get_uri() == TERMINATE_FAVICON_URL:
            return True

        return False

    def _create_favicon_response(self, http_response):
        favicon = os.path.join(ROOT_PATH,
                               'plugins/crawl/spider_man/favicon.ico')

        headers = Headers((
            ('Connection', 'close'),
            ('Content-type', 'image/vnd.microsoft.icon'),
        ))

        http_response = HTTPResponse(200,
                                     file(favicon, 'rb').read(),
                                     headers,
                                     http_response.get_uri(),
                                     http_response.get_uri(),
                                     msg='Ok')
        return http_response

    def _is_terminate_request(self, http_request):
        if http_request.get_uri() == TERMINATE_URL:
            return True

        return False

    def _terminate(self):
        om.out.information('The user terminated the spider_man session.')

        def stop(after):
            time.sleep(after)
            self.parent_process.stop()

        Process(target=stop, args=(2, )).start()

    def _create_terminate_response(self, http_response):
        content = render('spiderman_end.html', {})

        headers = Headers((
            ('Connection', 'close'),
            ('Content-type', 'text/html'),
        ))

        http_response = HTTPResponse(200, content.encode('utf-8'), headers,
                                     http_response.get_uri(),
                                     http_response.get_uri(),
                                     msg='Ok')
        return http_response


class LoggingProxy(Proxy):
    def __init__(self, ip, port, uri_opener, handler_klass=LoggingHandler,
                 ca_certs=Proxy.CA_CERT_DIR, name='LoggingProxyThread',
                 target_domain=None, plugin=None):
        """
        Override the parent init so we can save the plugin reference, all the
        rest is just the same.
        """
        super(LoggingProxy, self).__init__(ip, port, uri_opener,
                                           handler_klass=handler_klass,
                                           ca_certs=ca_certs,
                                           name=name)
        self.plugin = plugin
        self.target_domain = target_domain
